{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# BNF、DSL解析"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "    first       ::=  letter | \"_\"\n",
    "    letter      ::=  \"a\" | \"b\" | ... \"z\" | \"A\" | \"B\" | ... | \"Z\"\n",
    "    digit       ::=  \"0\" | \"1\" | ... | \"9\"\n",
    "    rest        ::=  first | digit\n",
    "    identifier  ::=  first rest*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['_']\n",
      "['456sad']\n",
      "['a', 'a']\n",
      "['aa']\n",
      "['17']\n",
      "['17', '33', '88']\n",
      "['imaginary', 'farcical', 'aquatic', 'ceremony']\n",
      "['imaginary', ['farcical', 'aquatic', 'ceremony']]\n"
     ]
    }
   ],
   "source": [
    "from pyparsing import *\n",
    "\n",
    "first = Word(alphas+\"_\", exact=1)    #从目标字符串中找到参数1集合中也有的元素，选取1个元素（exact=1）；参数2还可以由min、max\n",
    "print first.parseString('_asdfghj')\n",
    "\n",
    "rest = Word(alphanums+\"_\")\n",
    "print rest.parseString('456sad')\n",
    "\n",
    "identifier = first + Optional(rest) #Optional等价于*，表示0或无限\n",
    "print identifier.parseString('aa')\n",
    "\n",
    "identifier = Combine(first+pp.Optional(rest)) #Combine代表，多个tokens相连，注意与上面区别，与Group的区别\n",
    "print identifier.parseString('aa')\n",
    "\n",
    "number = Word(nums)\n",
    "print number.parseString('17 33 88') #‘17 33 88’被空格符拆分成‘17’、‘33’、‘88’， Word(nums)匹配到‘17’\n",
    "numberList = OneOrMore(number)       #近似等用于Word(nums)+Word(nums)+Word(nums)\n",
    "print numberList.parseString('17 33 88')\n",
    "\n",
    "\n",
    "word = Word(alphas)\n",
    "phrase = OneOrMore(word)\n",
    "ungrouped = word + phrase\n",
    "print ungrouped.parseString('imaginary farcical aquatic ceremony')\n",
    "phrase = Group(OneOrMore(word)) #Group把多个tokens放到一个list中，注意与Combine区别，\n",
    "ungrouped = word + phrase\n",
    "print ungrouped.parseString('imaginary farcical aquatic ceremony')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## ParserElement类的子类"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Literal - 构建一个字符串用于精确匹配"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['hello']\n",
      "['hello']\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "['__class__',\n",
       " '__delattr__',\n",
       " '__doc__',\n",
       " '__format__',\n",
       " '__getattribute__',\n",
       " '__hash__',\n",
       " '__init__',\n",
       " '__iter__',\n",
       " '__name__',\n",
       " '__new__',\n",
       " '__reduce__',\n",
       " '__reduce_ex__',\n",
       " '__repr__',\n",
       " '__setattr__',\n",
       " '__sizeof__',\n",
       " '__str__',\n",
       " '__subclasshook__',\n",
       " 'close',\n",
       " 'gi_code',\n",
       " 'gi_frame',\n",
       " 'gi_running',\n",
       " 'next',\n",
       " 'send',\n",
       " 'throw']"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pyparsing as pp\n",
    "ni = pp.CaselessLiteral('hello')\n",
    "print ni.parseString('Hello')\n",
    "print ni.parseString('Hello1')\n",
    "\n",
    "k = pp.Keyword('hello')\n",
    "res = k.scanString('hello')\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Keyword - 精确匹配字符串\n",
    "   >matchString，匹配模式字符串\n",
    "   \n",
    "   >identChars=DEFAULT\\_KEYWORD\\_CHARS， 默认有效字符集合是所有大小写字母+\"_$\",这些集合之外的字符作为切割字符\n",
    "   \n",
    "   >caseless=False，默认区分大小写，设置成True等同于CaselessKeyword\n",
    "   \n",
    "*通常用于定义保留字，例如C语言if，else*"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['hello']\n",
      "['hello@']\n",
      "['hello']\n"
     ]
    }
   ],
   "source": [
    "import pyparsing as pp\n",
    "\n",
    "k = pp.Keyword('hello')\n",
    "print k.parseString('hello@world') #‘@’不在DEFAULT_KEYWORD_CHARS中，因此'hello@world'被切割成‘hello’和‘world’，matchstring为‘hello’刚好匹配上\n",
    "#反例，下例如法匹配上‘hello’\n",
    "#k = pp.Keyword('hello', identChars=pp.Keyword.DEFAULT_KEYWORD_CHARS+'@')\n",
    "#print k.parseString('hello@world')\n",
    "\n",
    "#正例,'@'不作为分割字符\n",
    "k = pp.Keyword('hello@')\n",
    "print k.parseString('hello@')\n",
    "#备注：分割字符集合=identChars集合的补集-matchstring中的字符集合\n",
    "\n",
    "#正例，忽略大小写，同等于CaselessKeyword\n",
    "k = pp.Keyword('hello',caseless=True)\n",
    "print k.parseString('Hello')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### CaselessKeyword - 忽略小写的精确匹配字符串"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['hello']\n"
     ]
    }
   ],
   "source": [
    "k = pp.CaselessKeyword('hello')\n",
    "print k.parseString('Hello')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 参考\n",
    "http://infohost.nmt.edu/tcc/help/pubs/pyparsing/web/index.html"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
